########All data analyses were carried out using using R (version 3.2.2) #######
rm(list = ls())
library(foreign)
#install.packages("readstata13")
library(readstata13)
#install.packages("arm")
library(arm)
#install.packages("car")
library(car)

dat<-read.dta13("FDI_and_Corruption(ISQ).dta")
attach(dat)

mydata<-na.omit(cbind.data.frame(cpi0004,rfdistockcap,clrgdpl,distance,lat_abst,
  democracy,resend,lopenk,avelf,catho80,muslim80,protmg80,legor_uk,legor_fr,
  countryname,countrycode))
detach(dat)
rm(dat)
attach(mydata)

################################################################################
######################### Section B: First-Stage Results #######################
################## Figure A: FDI Stock per Capita and Remoteness ###############
################################################################################
png(file="SI_Figure_A.png",width=3.6,height=3.6,units="in",res=900)
par(mfrow=c(1,1),mar=c(2.5,2.5,.5,.5))
plot(distance,rfdistockcap, pch=16,xlab="Remoteness (Inverse of)",
ylab="Real FDI Stock per Capita", cex=.5, cex.lab=0.5, cex.axis=.5,mgp = c(1.2,.5,0))
abline(lm(rfdistockcap~distance),lwd=1)
text(distance[rfdistockcap>=30],rfdistockcap[rfdistockcap>=30], 
labels=countrycode[rfdistockcap>=30], pos=1, offset=0.3, cex=.5)
dev.off()

################################################################################
################## Figure B: FDI Stock per Capita and Remoteness ###############
################################################################################
outliers<-ifelse(countryname=="Ireland"|countryname=="Singapore",1,0)
png(file="SI_Figure_B.png",width=3.6,height=3.6,units="in",res=900)
par(mfrow=c(1,1),mar=c(2.5,2.5,.5,.5))
plot(distance[outliers==0],rfdistockcap[outliers==0], pch=16,
xlab="Remoteness (Inverse of)",ylab="Real FDI Stock per Capita",
cex=.5, cex.lab=0.5, cex.axis=.5,mgp = c(1.2,.5,0))
abline(lm(rfdistockcap[outliers==0]~distance[outliers==0]),lwd=1)
dev.off()
################################################################################
#Figure C: Studentized Residuals, Hat Values, and Cook's Distances (CPI Sample)#
################################################################################
########################## Function: Influence Plot ############################
influence.plot<-function(model,scale=10,lwd=c(1,2),
labels=names(rstud), ...){
hatval<-hatvalues(model)
rstud<-rstudent(model)
cook<-sqrt(as.vector(cooks.distance(model)))
scale<-scale/max(cook)
p<-length(coef(model))
n<-length(rstud)
cutoff<-sqrt(4/(n-p))
plot(hatval,rstud, xlab="Hat-Values",
ylab="Studentized Residuals", type="n", xaxs="i",
xlim=c(0,.40),ylim=c(-5,15), cex.lab=.8, cex.axis=.8,mgp = c(1.8,.5,0),
,...)
abline(v=c(2,3)*p/n,lty=2)
abline(h=c(-2,0,2),lty=2)
for (i in 1:n)
points(hatval[i],rstud[i],cex=scale*cook[i],
lwd=if (cook[i]>cutoff) lwd[2] else lwd[1])
text(hatval[cook>cutoff], rstud[cook>cutoff], labels=countrycode[cook>cutoff],cex=.5)
}

################################################################################
###### Figure C
fit<-lm(rfdistockcap~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
          muslim80+protmg80+legor_uk+legor_fr,x=T)

png(file="SI_Figure_C.png",width=4.6, height=5,units="in",res=900)
par(mfrow=c(1,1),mar=c(3.5,3.5,.6,.6))
influence.plot(fit)
dev.off()

################################################################################
######################## Figure D: Added-Variable Plots ########################
################################################################################
# Figure D
xnames<-cbind("Remoteness","Abst Latitude","Democracy","Natural Resources",
              "Trade Openness","Ethno. Frac.","Cathlics","Muslims",
              "Protestants","British Legal Origin","French Legal Origin")
varlist<-cbind("distance","lat_abst","democracy","resend","lopenk","avelf",
               "catho80","muslim80","protmg80","legor_uk","legor_fr")
N<-length(xnames)

png(file="SI_Figure_D.png",width=8.15, height=5.8,units="in",res=900)
par(mfrow=c(3,4),mar=c(2.7,2.5,2.5,1))
for (i in 1:N){
avPlot(fit,varlist[i],xlab=paste(xnames[i],'| Others'), 
       main=paste(xnames[i]),
       ylab="FDI Stock per Capita | Others",pch=19,lty=2,col="black",
       col.lines="black",id.method=abs(residuals(fit)), id.n=2, 
       mgp = c(1.6,.5,0),labels=countrycode, cex.lab=.8)
}
dev.off()

################################################################################
######## Models in Table A: First-Stage Regression Results (CPI Sample) ########
################################################################################
# Model 1
fit1<-lm(rfdistockcap~distance+clrgdpl+democracy+resend+lopenk+avelf+catho80+
           muslim80+protmg80,data=subset(mydata,outliers==0))
summary(fit1)

# Model 2
fit2<-lm(rfdistockcap~distance+clrgdpl+democracy+resend+lopenk+avelf+catho80+
           muslim80+protmg80+legor_uk+legor_fr, data=subset(mydata,outliers==0))
summary(fit2)

# Model 3
fit3<-lm(rfdistockcap~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
           muslim80+protmg80,data=subset(mydata,outliers==0))
summary(fit3)

# Model 4
fit4<-lm(clrgdpl~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
           muslim80+protmg80,data=subset(mydata,outliers==0))
summary(fit4)

# Model 5
fit5<-lm(rfdistockcap~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
           muslim80+protmg80+legor_uk+legor_fr,data=subset(mydata,outliers==0))
summary(fit5)

# Model 6
fit6<-lm(clrgdpl~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
         muslim80+protmg80+legor_uk+legor_fr,data=subset(mydata,outliers==0))
summary(fit6)

################################################################################
#Figure E: Studentized Residuals, Hat Values, and Cook's Distances (WBES Sample)#
################################################################################
detach(mydata)
rm(mydata)
rm(list = ls())

dat<-read.dta13("FDI_and_Corruption_WBES(ISQ).dta")
attach(dat)

mydata<-na.omit(cbind.data.frame(logbrib_inci,logbrib_dep,rfdistockcap,clrgdpcap, 
distance,latitude,polity2,resend,lopen,elf85,cath80,musl80,prot80,leg_british,
leg_french,countryname,countrycode))
detach(dat)
rm(dat)
attach(mydata)

fit<-lm(rfdistockcap~distance+latitude+polity2+resend+lopen+elf85+cath80+musl80+
          prot80+leg_british+leg_french)

# Run Function "influence.plot" (Lines 50-69)
png(file="SI_Figure_E.png",width=4.6, height=5,units="in",res=900)
par(mfrow=c(1,1),mar=c(3.5,3.5,.6,.6))
influence.plot(fit)
dev.off()

################################################################################
############### Figure F: Added-Variable Plots (WBES Sample) ###################
################################################################################
xnames<-cbind("Remoteness","Abst Latitude","Democracy","Natural Resources",
              "Trade Openness","Ethno. Frac.","Cathlics","Muslims",
              "Protestants","British Legal Origin","French Legal Origin")
varlist<-cbind("distance","latitude","polity2","resend","lopen","elf85",
               "cath80","musl80","prot80","leg_british","leg_french")
N<-length(xnames)

png(file="SI_Figure_F.png",width=8.15, height=5.8,units="in",res=900)
par(mfrow=c(3,4),mar=c(2.7,2.5,2.5,1))
for (i in 1:N){
  avPlot(fit,varlist[i],xlab=paste(xnames[i],'| Others'), 
         main=paste(xnames[i]),ylab="FDI Stock per Capita | Others",
         pch=19,lty=2,col="black",col.lines="black",
         id.method=abs(residuals(fit)),id.n=3,mgp = c(1.6,.5,0),
         labels=countrycode,cex.lab=.65)
}
dev.off()

################################################################################
################### Section c: FDI Inflows and Corruption ######################
################################################################################

################################################################################
################## Table B: FDI Inflow per Capita and Corruption ###############
################################################################################

detach(mydata)
rm(mydata)
rm(list = ls())

dat<-read.dta13("FDI_and_Corruption(ISQ).dta")
attach(dat)

mydata<-na.omit(cbind.data.frame(cpi0004,clrgdpl,rfdistockcap,rnetfdicap,
        distance,lat_abst,democracy,resend,lopenk,avelf,catho80,muslim80,
        protmg80,legor_uk,legor_fr,country,countrycode))
detach(dat)
rm(dat)
attach(mydata)

################################################################################
##R Function: IV Regression with One Endogenous Regressor & Interaction Term ###
################################################################################

# x1: Endogenous Regressor
# x2: Exogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

ivreg1<-function(x1,x2,y,z1,covarmat,outliers){
#define dataset
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,covarmat,outliers))
#drop outliers
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
#excluded instrument
zmat<-as.matrix(datamat[,4])
#included instruments
xmat<-as.matrix(datamat[,5:(ncol(datamat)-1)])

# first-stage regression
fit1a<-lm(x1~zmat+x2+xmat,x=T)
x1.hat<-fit1a$fitted

Z<-fit1a$x
Zmat<-x2*Z[,-1]

fit1c<-lm(x1*x2~-1+Zmat)
x1x2.hat<-fit1c$fitted

#second-stage regression
fit2<-lm(y~x1.hat+x1x2.hat+x2+xmat,x=TRUE)

#correct SEs of second-stage regression
x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x1x2.hat"]<-x1*x2
resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

##### Function: R-squared & adjusted R-squared
rsquared<-function(y,x.adj,fit){
y.hat<-x.adj%*%coef(fit)
Rsqr<-(sum((y-mean(y))*(y.hat-mean(y.hat))))^2/(sum((y-mean(y))^2)*sum((y.hat-mean(y.hat))^2))
Rsqr.adj<-1-((length(y)-1)*(1-Rsqr)/(length(y)-ncol(x.adj)))
print(cbind(Rsqr,Rsqr.adj),2)
}

# p-values
pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y)-ncol(x.adj)),lower.tail=F)
output<-(cbind(coef(fit2),se.adj,abs(coef(fit2)/se.adj),pvalue))
colnames(output)<-c("Coefs","SEs","|t|", "P>|t|")
print(round(output,2))
rsquared(y,x.adj,fit2)
# no. of obs.
nrow(datamat)
}

################################################################################
# R Function: IV Regression with Two Endogenous Regressors & Interaction Term ##
################################################################################
# x1: Endogenous Regressor
# x2: Endogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# z2: Instrument for x2
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

ivreg2<-function(x1,x2,y,z1,z2,covarmat,outliers){
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,z2,covarmat,outliers))
#drop outliers
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
zmat<-cbind(datamat$z1,datamat$z2)
xmat<-as.matrix(datamat[,6:(ncol(datamat)-1)])
# first stage regression
fit1a<-lm(x1~zmat+xmat,x=T)
x1.hat<-fit1a$fitted

fit1b<- lm(x2~ zmat+xmat,x=T)
x2.hat<-fit1b$fitted

Z<-fit1b$x
Zmat<-matrix(NA,nrow(xmat),ncol(Z)^2)

for (i in 1:ncol(Z)){
Zmat[,(ncol(Z)*(i-1)+1):(ncol(Z)*i)]<-Z[,i]*Z
}

fit1c<-lm(x1*x2~Zmat)
x1x2.hat<-fit1c$fitted

# second stage regression
fit2<-lm(y~x1.hat+x2.hat+x1x2.hat+xmat,x=TRUE)

x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x2.hat"]<-x2
x.adj[,"x1x2.hat"]<-x1*x2

resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

##### Function: R-squared & adjusted R-squared
rsquared<-function(y,x.adj,fit){
y.hat<-x.adj%*%coef(fit)
Rsqr<-(sum((y-mean(y))*(y.hat-mean(y.hat))))^2/(sum((y-mean(y))^2)*sum((y.hat-mean(y.hat))^2))
Rsqr.adj<-1-((length(y)-1)*(1-Rsqr)/(length(y)-ncol(x.adj)))
print(cbind(Rsqr,Rsqr.adj),2)
}

# p-values
pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y)-ncol(x.adj)),lower.tail=F)
output<-(cbind(coef(fit2),se.adj,abs(coef(fit2)/se.adj),pvalue))
colnames(output)<-c("Coefs","SEs","|t|", "P>|t|")
print(round(output,2))
rsquared(y,x.adj,fit2)
# no. of obs.
nrow(datamat)
}

################################################################################
outliers<-ifelse(country=="Ireland"|country=="Singapore",1,0)

#Covariates
covarmat1<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80)
#Covariates with two legal origin variables
covarmat2<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80,
                 legor_uk, legor_fr)

# Table B
# Model 1
#install.packages("AER")
library(AER)

fit.b.1<-ivreg(cpi0004~rnetfdicap+clrgdpl+democracy+resend+lopenk+avelf+catho80+muslim80
       +protmg80|distance+clrgdpl+democracy+resend+lopenk+avelf+catho80+muslim80
       +protmg80,data=subset(mydata, outliers==0))
summary(fit.b.1)

# Model 2
fit.b.2<-ivreg(cpi0004~rnetfdicap+clrgdpl+democracy+resend+lopenk+avelf+catho80+muslim80
+protmg80+legor_uk+legor_fr|distance+clrgdpl+democracy+resend+lopenk+avelf+catho80+muslim80
+protmg80+legor_uk+legor_fr,data=subset(mydata, outliers==0))
summary(fit.b.2)

# Model 3
# Run Function "ivreg1" (Lines: 215--265)
ivreg1(rnetfdicap,clrgdpl,cpi0004,distance,covarmat1,outliers)

# Model 4
# Run Function "ivreg2" (Lines: 278--332)
ivreg2(rnetfdicap,clrgdpl,cpi0004,distance,lat_abst,covarmat1,outliers)

# Model 5
ivreg2(rnetfdicap,clrgdpl,cpi0004,distance,lat_abst,covarmat2,outliers)

################################################################################
#Figure G: Studentized Residuals, Hat Values, and Cook's Distances (CPI Sample)#
################################################################################
fit<-lm(rnetfdicap~distance+lat_abst+democracy+resend+lopenk+avelf+catho80+
          muslim80+protmg80+legor_uk+legor_fr)
# Figure G
# Run Function "influence.plot" (Lines: 49--68)
png(file="SI_Figure_G.png",width=4.6, height=5,units="in",res=900)
par(mfrow=c(1,1),mar=c(3.5,3.5,.6,.6))
influence.plot(fit)
dev.off()
################################################################################
################ Figure H: Added-Variable Plots (CPI Sample) ###################
################################################################################
xnames<-cbind("Remoteness","Abst Latitude","Democracy","Natural Resources",
              "Trade Openness","Ethno. Frac.","Cathlics","Muslims",
              "Protestants","British Legal Origin","French Legal Origin")
varlist<-cbind("distance","lat_abst","democracy","resend","lopenk","avelf",
               "catho80","muslim80","protmg80","legor_uk","legor_fr")
N<-length(xnames)

png(file="SI_Figure_H.png",width=8.15, height=5.8,units="in",res=900)
par(mfrow=c(3,4),mar=c(2.7,2.5,2.5,1))
for (i in 1:N){
  avPlot(fit,varlist[i],xlab=paste(xnames[i],'| Others'), 
         main=paste(xnames[i]),
         ylab="FDI Stock per Capita | Others",pch=19,lty=2,col="black",
         col.lines="black",id.method=abs(residuals(fit)), id.n=2,
         mgp = c(1.6,.5,0),labels=countrycode, cex.lab=0.8)
}
dev.off()
################################################################################
###### Section D: Influential Outliers & Alternative Model Specifications ######
################################################################################

################################################################################
# R Function: IV Regression with Two Endogenous Regressors & Interaction Term ##
################################################################################
ivreg2.out<-function(x1,x2,y,z1,z2,covarmat,outliers){
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,z2,covarmat,outliers))
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
y<- datamat[,"y"]
outliers<- datamat[,"outliers"]
zmat<-cbind(datamat$z1,datamat$z2)
xmat<-as.matrix(datamat[,6:(ncol(datamat)-1)])
# first stage regression
fit1a<-lm(x1~zmat+xmat+outliers,x=T)
x1.hat<-fit1a$fitted

fit1b<- lm(x2~ zmat+xmat+outliers,x=T)
x2.hat<-fit1b$fitted

Z<-fit1b$x
Zmat<-matrix(NA,nrow(xmat),ncol(Z)^2)

for (i in 1:ncol(Z)){
Zmat[,(ncol(Z)*(i-1)+1):(ncol(Z)*i)]<-Z[,i]*Z
}

fit1c<-lm(x1*x2~Zmat)
x1x2.hat<-fit1c$fitted

# second stage regression
fit2<-lm(y~x1.hat+x2.hat+x1x2.hat+xmat,x=TRUE)

x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x2.hat"]<-x2
x.adj[,"x1x2.hat"]<-x1*x2

resids<-y-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

##### Function: R-squared & adjusted R-squared
rsquared<-function(y,x.adj,fit){
y.hat<-x.adj%*%coef(fit)
Rsqr<-(sum((y-mean(y))*(y.hat-mean(y.hat))))^2/(sum((y-mean(y))^2)*sum((y.hat-mean(y.hat))^2))
Rsqr.adj<-1-((length(y)-1)*(1-Rsqr)/(length(y)-ncol(x.adj)))
print(cbind(Rsqr,Rsqr.adj),2)
}

# p-values
pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y)-ncol(x.adj)),lower.tail=F)
output<-(cbind(coef(fit2),se.adj,abs(coef(fit2)/se.adj),pvalue))
colnames(output)<-c("Coefs","SEs","|t|", "P>|t|")
print(round(output,2))
rsquared(y,x.adj,fit2)
# no. of obs.
nrow(datamat)
}
################################################################################
################################## Table C #####################################
# CPI Sample
# Run Function "ivreg2.out" (Lines: 408--461)
#Model 1
ivreg2.out(rfdistockcap,clrgdpl,cpi0004,distance,lat_abst,covarmat2,outliers)

#Model 2
ivreg2.out(rnetfdicap,clrgdpl,cpi0004,distance,lat_abst,covarmat2,outliers)

# WBES Sample
detach(mydata)
rm(mydata)
rm(list = ls())

dat<-read.dta13("FDI_and_Corruption_WBES(ISQ).dta")
attach(dat)

covarmat1<-cbind(polity2, resend, lopen,elf85,cath80,musl80,prot80)
covarmat2<-cbind(polity2,resend, lopen,elf85,cath80,musl80,prot80,
                 leg_british,leg_french)

outliers<-ifelse(countryname=="Kazakhstan"|countryname=="Lebanon"|
                   countryname=="Lesotho"|countryname=="Sweden"|
                   countryname=="Trinidad and Tobago",1,0)

# Run Function "ivreg2.out" (Lines: 408--461)
# Model 3
ivreg2.out(rfdistockcap,clrgdpcap,logbrib_inci,distance,latitude,covarmat1,outliers)

# Model 4 
ivreg2.out(rfdistockcap,clrgdpcap,logbrib_inci,distance,latitude,covarmat2,outliers)

#Model 5
ivreg2.out(rfdistockcap,clrgdpcap,logbrib_dep,distance,latitude,covarmat1,outliers)

# Model 6
ivreg2.out(rfdistockcap,clrgdpcap,logbrib_dep,distance,latitude,covarmat2,outliers)

################################################################################
### Section E: Sensitivity Analysis of IV Exclusion Restriction ################
################################################################################
detach(dat)
rm(dat)
rm(list = ls())

dat<-read.dta13("FDI_and_Corruption(ISQ).dta")
attach(dat)

outliers<-ifelse(countryname=="Ireland"|countryname=="Singapore",1,0)
fdi<- rfdistockcap
gdpcap<-clrgdpl
covarmat<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80,
                legor_uk,legor_fr)
datamat<-na.omit(cbind.data.frame(fdi,gdpcap,cpi0004,distance,lat_abst,covarmat,
                                  outliers))
datamat<-subset(datamat,outliers==0)
fdi<-datamat[,"fdi"]
gdpcap<-datamat[,"gdpcap"]
y<- datamat[,"cpi0004"]
zmat<-as.matrix(datamat[,4:5])
xmat<-as.matrix(datamat[,6:(ncol(datamat)-1)])

# First-Stage Regression
fit1a<-lm(fdi~zmat+xmat,x=T)
fdi.hat<-fit1a$fitted

fit1b<- lm(gdpcap~ zmat+xmat,x=T)
gdpcap.hat<-fit1b$fitted

Z<-fit1b$x
Zmat<-matrix(NA,nrow(xmat),ncol(Z)^2)

for (i in 1:ncol(Z)){
Zmat[,(ncol(Z)*(i-1)+1):(ncol(Z)*i)]<-Z[,i]*Z
}

fit1c<-lm(fdi*gdpcap~Zmat)
fdigdp.hat<-fit1c$fitted

#########
# Specify a set of values for gamma
N<-500
gamma_z<-seq(-0.01, .01, length.out = N)
coefmat<-matrix(NA,N,6)

for (i in 1:N){
#Substract the direct effect of gamma from y 
y1<-y - gamma_z[i]*zmat[,1]

#Second-Stage Regression
fit2<-lm(y1~fdi.hat+fdigdp.hat+gdpcap.hat+xmat,x=TRUE)

x.adj<-fit2$x
x.adj[,"fdi.hat"]<-fdi
x.adj[,"gdpcap.hat"]<-gdpcap
x.adj[,"fdigdp.hat"]<-gdpcap*fdi

resids<-y1-x.adj%*%coef(fit2)
resids.sd<-sqrt(sum(resids^2)/(length(y1)-ncol(x.adj)))
se.adj<-se.coef(fit2)*resids.sd/sigma.hat(fit2)

pvalue<-2*pt(abs(coef(fit2)/se.adj),df=(length(y1)-ncol(x.adj)),lower.tail=F)

n<-length(se.adj)
sig<-rep(NA,n)
sig[pvalue>0.05&pvalue<=0.1]<-0.1
sig[pvalue>0.01&pvalue<=0.05]<-0.05
sig[pvalue<=0.01]<-0.01

coefmat[i,]<-cbind(gamma_z[i],coef(fit2)[2],sig[2],coef(fit2)[3],sig[3],resids.sd)
}

output<-data.frame(round(cbind(coefmat,coefmat[,2]/(-coefmat[,4])),3) )
names(output)<-c("gamma","fdi","pvalue1","fdigdp","pvalue2","rse","cutoff")

######## Figure I
png(file="SI_Figure_I.png",width=6.5, height=4.5,units="in",res=900)
par(mfrow=c(1,1),mar=c(4,4,1,4),xpd=F)
meffect.low<-output[,2]-.16058785*output[,4] 
meffect.high<-output[,2]+1.6491884*output[,4]
plot(gamma_z, meffect.low, type="n", axes=F, 
     ylim=c(-0.5,2),xlab="Direct Effects (gamma)",
     ylab="Marginal Effects of FDI on Corruption",
     cex.lab=0.8, cex.axis=.8, mgp = c(2.2,.5,0))
box()
axis(1,at=seq(-0.010,0.010,by=0.005),cex.axis=0.8)
axis(2,at=seq(-0.5,2,by=.5),cex.axis=0.8)
axis(4,at=seq(-0.5,2,by=.5),cex.axis=0.8)
mtext("Residual Standard Errors",side=4, line =2.2,cex=0.8)

lines(gamma_z, meffect.low,lty=1,lwd=1.5)
lines(gamma_z, meffect.high,lty=2,lwd=1.5)     
abline(0,0)
abline(v=0.0033066,col="gray")
abline(v=-0.00026052,col="gray")
lines(gamma_z, output[,6],lwd=1.5) 
text(-0.005,1,labels="GDP per Capita=-0.16",cex=0.6)
text(-0.007,.21,labels="GDP per Capita=1.65",cex=0.6)
dev.off()


################################################################################
######### Section F: Political Development, FDI, and Corruption ################
################################################################################
rm(list = ls())

# Table D
covarmat1<-cbind(clrgdpl,resend,lopenk,avelf,catho80,muslim80,protmg80)
covarmat2<-cbind(clrgdpl,resend,lopenk,avelf,catho80,muslim80,protmg80,
                 legor_uk,legor_fr)
outliers<-ifelse(country=="Ireland"|country=="Singapore",1,0)

# Run Function "ivreg1" (Lines: 215--265)
# Model 1
ivreg1(rfdistockcap,democracy,cpi0004,distance,covarmat1,outliers)

# Model 2
ivreg1(rfdistockcap,alldem95,cpi0004,distance,covarmat1,outliers)

# Model 3
ivreg1(rfdistockcap,democracy,cpi0004,distance,covarmat2,outliers)

# Model 4
ivreg1(rfdistockcap,alldem95,cpi0004,distance,covarmat2,outliers)

################################################################################
## Figure J: Plot of Marginal Effects of FDI at Different Levels of Democracy ##
################################################################################

################################################################################
####################### Function: Marginal Effect Plot #########################
################################################################################
# x1: Endogenous Regressor
# x2: Exogenous Regressor
# Y: Dependent Variable
# z1: Instrument for x1
# covarmat: A Matrix of Covariates
# outliers: Statistical Outliers

meffect.plot<-function(x1,x2,y,z1,covarmat,outliers,xlab){
#define dataset
datamat<-na.omit(cbind.data.frame(x1,x2,y,z1,covarmat,outliers))
#drop outliers
datamat<-subset(datamat, datamat$outliers==0)
x1<-datamat[,"x1"]
x2<-datamat[,"x2"]
#DV
y<- datamat[,"y"]
#excluded instrument
zmat<-as.matrix(datamat[,4])
#included instruments
xmat<-as.matrix(datamat[,5:(ncol(datamat)-1)])
  
# first-stage regression
fit1a<-lm(x1~zmat+x2+xmat,x=T)
x1.hat<-fit1a$fitted

Z<-fit1a$x
Zmat<-x2*Z[,-1]
  
fit1c<-lm(x1*x2~-1+Zmat)
x1x2.hat<-fit1c$fitted
  
#second-stage regression
fit2<-lm(y~x1.hat+x1x2.hat+x2+xmat,x=TRUE)
#correct SEs of second-stage regression
x.adj<-fit2$x
x.adj[,"x1.hat"]<-x1
x.adj[,"x1x2.hat"]<-x1*x2
resids<-y-x.adj%*%coef(fit2)

summ<-summary(fit2)
x<-fit2$x
sigma.sq<-sum(resids^2)/(length(y)-ncol(x.adj))
# Variance-Covariance Matrix
vcv<-sigma.sq*solve(t(x)%*%x)

# Simulate Coefficients
nsim<-1000
N<-1000
y.fake<-matrix(NA,N,nsim)
gdp.sim<-seq(-10,10, length.out=N)
coef.matrix<-mvrnorm(nsim,summ$coef[1:nrow(summ$coef)],vcv)
xmat1<-as.matrix(cbind(1, gdp.sim))
coef.m<-cbind(coef.matrix[,2],coef.matrix[,3])

for (i in 1:N){
  y.fake[i,]<-coef.m%*%xmat1[i,]
}

ci1<-matrix(NA,N,2)
y.fake.mean<-rep(NA,N)

for (i in 1:N){
ci1[i,]<-quantile(y.fake[i,],c(0.025,0.975))
y.fake.mean[i]<-mean(y.fake[i,])
}

# Plot Marginal Effects
plot(gdp.sim, y.fake.mean, type="n",  cex.main=1.1,
    ylim=c(min(ci1[,1])-0.02,max(ci1[,2])+0.02),cex.lab=1,
    xlim=c(-10,10),yaxs="i", xlab=xlab, ylab="Marginal Effects",    
    cex.lab=0.8, cex.axis=.8,mgp = c(2,.8,0))
grid(col = "gray80")
lines(gdp.sim, ci1[,1], lty=2,col="gray88")
lines(gdp.sim, ci1[,2], lty=2,col="gray88")
box()
frame.y1 <- c(ci1[,1],rev(ci1[,2]))
frame.x1 <- c(gdp.sim, rev(gdp.sim))
polygon(frame.x1, frame.y1, density=-1, col="gray88", border=NA)
abline(0,0)
lines(gdp.sim, y.fake.mean, lty=1,lwd=2,col="black")
}
################################################################################
# Figure J
png(file="SI_Figure_J.png",width=6.5,height=4,units="in",res=900)
par(mfrow=c(1,1),mar=c(3.5,3.5,1,1))
meffect.plot(rfdistockcap,democracy,cpi0004,distance,covarmat2,outliers,
             xlab="Democracy (Polity IV)")
dev.off()

################################################################################
###################### Table E: Alternative Measures of Democracy ##############
################################################################################

# Run Function "ivreg1" (Lines: 215--265)
# Model 1
ivreg1(rfdistockcap,parcomp,cpi0004,distance,covarmat2,outliers)

# Model 2
ivreg1(rfdistockcap,polcomp,cpi0004,distance,covarmat2,outliers)

# Model 3
ivreg1(rfdistockcap,PR0004,cpi0004,distance,covarmat2,outliers)

# Model 4
ivreg1(rfdistockcap,participation2000,cpi0004,distance,covarmat2,outliers)

# Model 5
# Run Function "ivreg2" (Lines: 278--332)
covarmat3<-cbind(resend,lopenk,avelf,catho80,muslim80,protmg80,
                 legor_uk,legor_fr)
ivreg2(rfdistockcap,poleconindex,cpi0004,distance,lat_abst,covarmat3,outliers)

################################################################################
############# Section G (Table F): Alternative Measures of Corruption ##########
################################################################################
covarmat1<-cbind(democracy,resend,lopenk,avelf,catho80,muslim80,protmg80,
                 legor_uk, legor_fr)
covarmat2<-cbind(clrgdpl,resend,lopenk,avelf,catho80,muslim80,protmg80,
                 legor_uk,legor_fr)
# Model 1
ivreg2(rfdistockcap,clrgdpl,corruption_icrg,distance,lat_abst,covarmat1,outliers)

# Model 2
ivreg1(rfdistockcap,democracy,corruption_icrg,distance,covarmat2,outliers)

# Model 3
ivreg2(rfdistockcap,clrgdpl,wbcorruption0004,distance,lat_abst,covarmat1,outliers)

# Model 4
ivreg1(rfdistockcap,democracy,wbcorruption0004,distance,covarmat2,outliers)

################################################################################
####### Sections H & I (Table G): Economic Concentration and Integration #######
################################################################################
# Model 1
polity<-democracy
covarmat.con<-cbind(clrgdpl,polity,resend,lopenk,avelf,catho80,muslim80,protmg80,
                    legor_uk, legor_fr)
ivreg1(rfdistockcap,eci,cpi0004,distance,covarmat.con,outliers)

# Model 2 
covarmat.int1<-cbind(democracy,resend,avelf,catho80,muslim80,protmg80,
                     legor_uk,legor_fr)
ivreg2(integration,clrgdpl,cpi0004,distance,lat_abst,covarmat.int1,outliers)

#Model 3
covarmat.int2<-cbind(clrgdpl,resend,avelf,catho80,muslim80,protmg80,
                     legor_uk, legor_fr)
ivreg1(integration,democracy,cpi0004,distance,covarmat.int2,outliers)

################################################################################
############### Section J: Additional Supplementary Information ################
################################################################################

###### Figure K : Plots of FDI stock per Capita and Corruption (CPI) ###########

mydata<-na.omit(cbind.data.frame(rfdistockcap,clrgdpl,cpi0004,democracy,lopenk,
        resend,avelf,catho80,muslim80,protmg80,legor_uk, legor_fr,countrycode))

png(file="SI_Figure_K.png",width=6.5,height=7.5,units="in",res=900)
par(mfrow=c(2,1),mar=c(3.5,3,3,1))
plot(mydata$rfdistockcap,mydata$cpi0004,pch=16,xlab="Real FDI Stock per Capita",
    ylab="CPI",main="Corruption Perception Index and Real FDI Stock per Capita",
    cex.main=1,cex=0.8,cex.lab=0.8, cex.axis=.8,mgp = c(1.8,.5,0))
text(mydata$rfdistockcap[mydata$rfdistockcap>30],
     mydata$cpi0004[mydata$rfdistockcap>30],
     labels=mydata$countrycode[mydata$rfdistockcap>30], pos=2,offset=0.3,cex=.8)

mydata$outliers<-ifelse(mydata$countrycode=="IRL"|mydata$countrycode=="SGP",1,0)

plot(mydata$rfdistockcap[mydata$outliers==0],mydata$cpi0004[mydata$outliers==0], 
     pch=16,xlab="Real FDI Stock per Capita",ylab="CPI",main=
"Corruption Perception Index and Real FDI Stock per Capita \nwithout IRL & SGP",
cex.main=1,cex=0.8,cex.lab=0.8, cex.axis=.8,mgp = c(1.8,.5,0))
dev.off()

#################### Table H: Descriptive Statistics (CPI Sample) ##############

mydata<-cbind.data.frame(cpi0004,wbcorruption0004,corruption_icrg,rfdistockcap,
rnetfdicap,clrgdpl,distance,lat_abst,eci,democracy,alldem95,polcomp, parcomp,
PR0004, participation2000,resend, lopenk,avelf,catho80,muslim80,protmg80,
legor_uk,legor_fr)

#install.packages("pastecs")
library(pastecs)
stats<-stat.desc(mydata[is.na(mydata$cpi0004)==0,])[c(1,4,5,9,13),]
pfround(stats,2)

#################### Table I: Correlation Matrix (CPI Sample) ##################
mydata2<- cbind.data.frame(cpi0004,rfdistockcap,clrgdpl,eci,democracy,
alldem95,resend, lopenk,avelf,catho80,muslim80,protmg80,legor_uk,legor_fr)
cormatrix<-cor(mydata2[is.na(mydata$cpi0004)==0,-1], use="complete.obs", 
               method="pearson")
pfround(cormatrix,2)

################################################################################
detach(dat)
rm(dat)
rm(list = ls())

dat<-read.dta13("FDI_and_Corruption_WBES(ISQ).dta")
attach(dat)

dat<-subset(dat,is.na(logbrib_inci)==0)
mydata<-cbind(logbrib_inci,logbrib_dep,rfdistockcap,clrgdpcap,distance,latitude,
        polity2,resend,lopen,elf85,cath80,musl80,prot80,leg_british,leg_french)
stats<-stat.desc(mydata)[c(1,4,5,9,13),]
pfround(stats,2)

################################################################################
mydata2<-cbind(rfdistockcap,clrgdpcap,polity2,resend,lopen,elf85,cath80,musl80,
               prot80,leg_british,leg_french)
cormatrix<-cor(mydata2, use="complete.obs", method="pearson")
pfround(cormatrix,2)
